source("Scripts/libraries.R")
source("Scripts/variables.R")
source("Scripts/functions.R")rm(list=ls())
source("Scripts/libraries.R")
source("Scripts/variables.R")
source("Scripts/functions.R")load(paste0(IO$sympto_data_02_processed,'cycledays.Rdata'), verbose = TRUE)## Loading objects:
## cycledays
agg = aggregate(cycle_nb ~ user_id, cycledays,lu)
j = which(agg$cycle_nb >= 50)
user_ids = agg$user_id[j]
rm(j, agg)for(u in user_ids[1:10]){
k = which(cycledays$user_id == u)
plot.tracking.history(d = cycledays[k,], show_goal = TRUE, relative_date = TRUE)
}rm(u, k)u = 1226
k = which(cycledays$user_id == u)
plot.tracking.history(d = cycledays[k,], show_goal = TRUE, relative_date = TRUE)cycledays_sympto_user_history = cycledays[k,]
save(cycledays_sympto_user_history,file = paste0(IO$restricted_figure_data,"history_sympto.Rdata"))
rm(u, k, cycledays_sympto_user_history, cycledays, user_ids)source("Scripts/_setup_Kindara.R")## Loading required package: foreach
## Loading required package: iterators
##
## Attaching package: 'plotrix'
## The following object is masked from 'package:scales':
##
## rescale
##
## Attaching package: 'chron'
## The following object is masked from 'package:foreach':
##
## times
## The following objects are masked from 'package:lubridate':
##
## days, hours, minutes, seconds, years
rm(list=ls())
source("Scripts/libraries.R")
source("Scripts/variables.R")
source("Scripts/functions.R")load(file = paste0(IO$kindara_data_accounts,'processed_cleaned_accounts_onefile.Rdata'), verbose = TRUE)## Loading objects:
## accounts
round(mean(accounts$age_registration, na.rm = TRUE), digits = 1)## [1] 29.2
round(sd(accounts$age_registration, na.rm = TRUE), digits = 1)## [1] 5.9
round(100 - (sum(is.na(accounts$age_registration))/nrow(accounts)*100))## [1] 13
users_demo_kindara = accounts[,c("age_registration","age_now")]
save(users_demo_kindara,file = paste0(IO$restricted_figure_data,"users_demo_kindara.Rdata"))
rm(accounts, users_demo_kindara)source("Scripts/_setup_Sympto.R")load(paste0(IO$sympto_data_02_processed,'users.Rdata'), verbose = TRUE)## Loading objects:
## users
users_demo = users[,c("age_registration","age_now","menarche_year","cm","kg","bmi")]
users_demo$cm_o = users_demo$cm
users_demo$cm[which(users_demo$cm < 100)] = 100 + users_demo$cm[which(users_demo$cm < 100)]
users_demo$bmi_o = users_demo$bmi
users_demo$bmi = users$kg / ((users_demo$cm/100)^2)
summary(users_demo)## age_registration age_now menarche_year cm
## Min. : 0.00 Min. : 0.00 Min. : 8.00 Min. :100.0
## 1st Qu.:26.00 1st Qu.:29.00 1st Qu.:12.00 1st Qu.:160.0
## Median :29.00 Median :33.00 Median :13.00 Median :165.0
## Mean :29.94 Mean :33.52 Mean :12.85 Mean :165.3
## 3rd Qu.:34.00 3rd Qu.:38.00 3rd Qu.:14.00 3rd Qu.:170.0
## Max. :74.00 Max. :77.00 Max. :18.00 Max. :229.0
## NA's :2570 NA's :2570 NA's :2819 NA's :2735
## kg bmi cm_o bmi_o
## Min. : 20.00 Min. : 7.703 Min. : 50.0 Min. : 7.703
## 1st Qu.: 54.00 1st Qu.: 19.818 1st Qu.:160.0 1st Qu.:19.818
## Median : 60.00 Median : 21.631 Median :165.0 Median :21.630
## Mean : 62.23 Mean : 22.788 Mean :161.3 Mean :22.840
## 3rd Qu.: 67.00 3rd Qu.: 24.342 3rd Qu.:170.0 3rd Qu.:24.314
## Max. :149.00 Max. :103.939 Max. :229.0 Max. :86.565
## NA's :2765 NA's :2786 NA's :2735 NA's :3214
round(apply(users_demo, 2, sd, na.rm = TRUE), digits = 2)## age_registration age_now menarche_year cm
## 6.36 6.69 1.61 6.73
## kg bmi cm_o bmi_o
## 13.14 4.76 21.25 5.02
100 - round(apply(users_demo, 2, function(x) sum(is.na(x)))/nrow(users)*100)## age_registration age_now menarche_year cm
## 81 81 79 80
## kg bmi cm_o bmi_o
## 80 80 80 76
users_demo_sympto = users_demo
save(users_demo_sympto,file = paste0(IO$restricted_figure_data,"users_demo_sympto.Rdata"))
rm(users, users_demo, users_demo_sympto)nothing to be done - everything is in the FAM_figures.Rmd
source("Scripts/_setup_Kindara.R")folder = paste0(IO$kindara_data_days,"03 selected/")
days.file.list = list.files(folder)
days.file.list = paste0(folder,days.file.list )
rm(folder)cycleday_from_end = -28:-1
breaks = seq(-0.6,2,by = 0.1)-0.05
mids = breaks[2:length(breaks)]-0.05
registerDoParallel(par$n_cores)
tic()
temp.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'temp_diff_from_p25',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 42.615 sec elapsed
colnames(temp.hist) = cycleday_from_end
n.tot = colSums(temp.hist)
temp.hist.norm = t(t(temp.hist)/ n.tot )
delta_BBT_distribution_kindara = data.frame(cycleday = cycleday_from_end,
d = t(temp.hist.norm))
colnames(delta_BBT_distribution_kindara) = c('cycleday',round(mids, digits = 2))
write.csv(delta_BBT_distribution_kindara,
file = paste0(IO$output_csv, 'delta_BBT_distribution_kindara.csv'),
row.names = FALSE)
save(delta_BBT_distribution_kindara, file = paste0(IO$output_Rdata, "delta_BBT_distribution_kindara.Rdata"))
n.cum = apply(temp.hist, 2, cumsum)
n.med = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.5, n.cum = n.cum, n.tot = n.tot)
med = mids[n.med]
n.10 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.1, n.cum = n.cum, n.tot = n.tot)
n.25 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.25, n.cum = n.cum, n.tot = n.tot)
n.75 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.75, n.cum = n.cum, n.tot = n.tot)
n.90 = sapply(X = 1:length(cycleday_from_end),which.quant, pc = 0.9, n.cum = n.cum, n.tot = n.tot)
t.10 = mids[n.10]
t.25 = mids[n.25]
t.75 = mids[n.75]
t.90 = mids[n.90]
delta_BBT_summary_kindara = data.frame(cycleday_from_end = cycleday_from_end,
p.10 = round(t.10, digits = 2),
p.25 = round(t.25, digits = 2),
median = round(med, digits = 2),
p.75 = round(t.75, digits = 2),
p.90 = round(t.90, digits = 2))
write.csv(delta_BBT_summary_kindara,
file = paste0(IO$output_csv, 'delta_BBT_summary_kindara.csv'),
row.names = FALSE)
save(delta_BBT_summary_kindara, file = paste0(IO$output_Rdata, "delta_BBT_summary_kindara.Rdata"))
rm(delta_BBT_summary_kindara, t.90, t.75, t.25, t.10, n.90, n.75, n.25,n.10, med, n.med,delta_BBT_distribution_kindara,temp.hist.norm, n.tot,mids, breaks, cycleday_from_end, n.cum, temp.hist)breaks = c(-0.25,dict$bleeding$index +0.25)
cycleday_from_end = -15:-1
cycleday = 1:15
tic()
bleeding.hist.end = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'bleeding',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 18.849 sec elapsed
tic()
bleeding.hist.start = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'bleeding',
breaks = breaks,
from_start = TRUE,
cycleday_v = cycleday)}
toc()## 17.946 sec elapsed
n.cycles = max(apply(bleeding.hist.start,2,sum))
bleeding.hist.start.norm = bleeding.hist.start / n.cycles
bleeding.hist.end.norm = bleeding.hist.end / n.cycles
bleeding_distribution_kindara = data.frame(
cycleday = c(cycleday_from_end, cycleday),
no_bleeding_reported = c(as.numeric(bleeding.hist.end.norm[1,]),as.numeric(bleeding.hist.start.norm[1,])),
spotting = c(as.numeric(bleeding.hist.end.norm[2,]),as.numeric(bleeding.hist.start.norm[2,])),
light_bleeding = c(as.numeric(bleeding.hist.end.norm[3,]),as.numeric(bleeding.hist.start.norm[3,])),
medium_bleeding = c(as.numeric(bleeding.hist.end.norm[4,]),as.numeric(bleeding.hist.start.norm[4,])),
heavy_bleeding = c(as.numeric(bleeding.hist.end.norm[5,]),as.numeric(bleeding.hist.start.norm[5,]))
)
write.csv(bleeding_distribution_kindara,
file = paste0(IO$output_csv, 'bleeding_distribution_kindara.csv'),
row.names = FALSE)
save(bleeding_distribution_kindara, file = paste0(IO$output_Rdata, "bleeding_distribution_kindara.Rdata"))
rm(bleeding.hist.start.norm, bleeding.hist.end.norm, bleeding.hist.start, bleeding.hist.end, cycleday_from_end, cycleday, bleeding_distribution_kindara)breaks = 0:13-0.5
mids = breaks[2:length(breaks)]-0.5
cycleday_from_end = -28:-1
tic()
mucus.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'mucus',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 19.622 sec elapsed
mucus.hist.norm = mucus.hist/n.cycles
cervical_mucus_distribution_kindara = data.frame(cycleday = cycleday_from_end,
m = t(mucus.hist.norm))
colnames(cervical_mucus_distribution_kindara) = c('cycleday',dict$mucus$names[2:14])
write.csv(cervical_mucus_distribution_kindara,
file = paste0(IO$output_csv, 'cervical_mucus_distribution_kindara.csv'),
row.names = FALSE)
save(cervical_mucus_distribution_kindara, file = paste0(IO$output_Rdata, "cervical_mucus_distribution_kindara.Rdata"))
rm(mucus.hist, mucus.hist.norm, breaks, mids, cycleday_from_end, cervical_mucus_distribution_kindara)breaks = 1:4-0.5
cycleday_from_end = -28:-1
tic()
cervix.openness.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'cervix_openness',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 18.239 sec elapsed
cervix.openness.hist.norm = cervix.openness.hist/n.cycles
tic()
cervix.height.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'cervix_height',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 16.812 sec elapsed
cervix.height.hist.norm = cervix.height.hist/n.cycles
tic()
cervix.firmness.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'cervix_firmness',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 19.475 sec elapsed
cervix.firmness.hist.norm = cervix.firmness.hist/n.cycles
cervix_distribution_kindara = data.frame(cycleday = cycleday_from_end,
o = t(cervix.openness.hist.norm),
f = t(cervix.firmness.hist.norm),
h = t(cervix.height.hist.norm)
)
colnames(cervix_distribution_kindara) = c('cycleday','o_closed','o_medium','o_open',
'f_firm','f_medium','f_soft',
'h_low','h_medium','h_high')
write.csv(cervix_distribution_kindara,
file = paste0(IO$output_csv, 'cervix_distribution_kindara.csv'),
row.names = FALSE)
save(cervix_distribution_kindara, file = paste0(IO$output_Rdata, "cervix_distribution_kindara.Rdata"))
rm(cervix_distribution_kindara, cervix.firmness.hist.norm, cervix.firmness.hist, cervix.height.hist.norm, cervix.height.hist, cervix.openness.hist.norm, cervix.openness.hist, breaks, cycleday_from_end )breaks = 1:5-0.5
cycleday_from_end = -28:-1
tic()
vaginal_sensation.hist = foreach(days.file = days.file.list,.combine = combine.sum) %dopar%
{hist.by.cycleday(file = days.file,
attr = 'vaginal_sensation',
breaks = breaks,
from_start = FALSE,
cycleday_v = cycleday_from_end)}
toc()## 17.022 sec elapsed
vaginal_sensation.hist.norm = vaginal_sensation.hist/n.cycles
vaginal_sensation_distribution_kindara = data.frame(cycleday = cycleday_from_end,
s = t(vaginal_sensation.hist.norm))
colnames(vaginal_sensation_distribution_kindara) = c('cycleday',dict$feel$names[3:6])
write.csv(vaginal_sensation_distribution_kindara,
file = paste0(IO$output_csv, 'vaginal_sensation_distribution_kindara.csv'),
row.names = FALSE)
save(vaginal_sensation_distribution_kindara, file = paste0(IO$output_Rdata, "vaginal_sensation_distribution_kindara.Rdata"))
rm(vaginal_sensation_distribution_kindara, vaginal_sensation.hist, vaginal_sensation.hist.norm, breaks, cycleday_from_end)rm(days.file.list, n.cycles)source("Scripts/_setup_Sympto.R")load(paste0(IO$sympto_data_02_standard_cycles_with_HMM_res,"cycledays.Rdata"), verbose = TRUE)## Loading objects:
## cycledays
CC = cycledays
n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
cc = CC[i.end,]
uu = unique(cbind(cc$cycleday_from_end, cc$low_norm_temp), by = c('cycleday_from_end','low_norm_temp'))
uuu = aggregate(cc[,c('cycleday_from_end','low_norm_temp')], by = list(cc$cycleday_from_end,cc$low_norm_temp), length)
uuu = uuu[,1:3]
colnames(uuu) = c('cycleday_from_end','low_norm_temp','n')
uuu$density = uuu$n/max(uuu$n)
delta_BBT_distribution_sympto = uuu[order(uuu$cycleday_from_end,uuu$low_norm_temp),-3]
write.csv(delta_BBT_distribution_sympto,
file = paste0(IO$output_csv, 'delta_BBT_distribution_sympto.csv'),
row.names = FALSE)
save(delta_BBT_distribution_sympto, file = paste0(IO$output_Rdata,"delta_BBT_distribution_sympto.Rdata") )
quant = aggregate(low_norm_temp ~ cycleday_from_end, cc, quantile, seq(0,1,by=0.05))
delta_BBT_summary_sympto = data.frame(cycleday = quant$cycleday_from_end,
p.10 = round(quant$low_norm_temp[,3], digits = 3),
p.25 = round(quant$low_norm_temp[,6], digits = 3),
median = round(quant$low_norm_temp[,11], digits = 3),
p.75 = round(quant$low_norm_temp[,16], digits = 3),
p.90 = round(quant$low_norm_temp[,19], digits = 3)
)
write.csv(delta_BBT_summary_sympto,
file = paste0(IO$output_csv, 'delta_BBT_summary_sympto.csv'),
row.names = FALSE)
save(delta_BBT_summary_sympto, file = paste0(IO$output_Rdata,"delta_BBT_summary_sympto.Rdata") )
rm(delta_BBT_distribution_sympto, delta_BBT_summary_sympto, quant, uu, uuu, cc, i.end, i.start, n.days.in.cycle)obs = 'b'
n.days.in.cycle = 15
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
#bleeding from start
cc = CC[i.start,]
N = length(unique(cc$cycle_id))
breaks = seq(0.5, n.days.in.cycle+0.5, by = 1)
for(b in c(1,2,3)){
h = hist(cc$out_cycleday[cc$blood == b], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, blood = paste0(obs,'.',b),freq = h$counts/N )
if(b == 1){resB = res}else{resB = rbind(resB, res)}
}
resB$blood = factor(resB$blood)
resB = cast(resB, out_cycleday ~ blood )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resB) = c('cycleday','light_bleeding','medium_bleeding','high_bleeding')
resB.start = resB
#bleeding from end
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(b in c(1,2,3)){
h = hist(cc$cycleday_from_end[cc$blood == b], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, blood = paste0(obs,'.',b),freq = h$counts/N)
if(b == 1){resB = res}else{resB = rbind(resB, res)}
}
resB$blood = factor(resB$blood)
resB = cast(resB, out_cycleday ~ blood )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resB) = c('cycleday','light_bleeding','medium_bleeding','high_bleeding')
resB.end = resB
resB = rbind(resB.end, resB.start)
bleeding_distribution_sympto = resB
write.csv(bleeding_distribution_sympto,
file = paste0(IO$output_csv, 'bleeding_distribution_sympto.csv'),
row.names = FALSE)
save(bleeding_distribution_sympto, file = paste0(IO$output_Rdata, 'bleeding_distribution_sympto.Rdata'))
rm(res, resB, resB.end, resB.start, bleeding_distribution_sympto, N, b, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
obs = 'm'
#mucus from end
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(m in c(1,2,3,4)){
h = hist(cc$cycleday_from_end[cc$elixir == m], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, mucus = paste0(obs,'.',m),freq = h$counts/N )
if(m == 1){resM = res}else{resM = rbind(resM, res)}
}
resM$mucus = factor(resM$mucus )
resM = cast(resM, out_cycleday ~ mucus )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resM) = c('cycleday',dict$mucus$hmm.symbols[2:5])
cervical_mucus_distribution_sympto = resM
write.csv(cervical_mucus_distribution_sympto,
file = paste0(IO$output_csv, 'cervical_mucus_distribution_sympto.csv'),
row.names = FALSE)
save(cervical_mucus_distribution_sympto, file = paste0(IO$output_Rdata, 'cervical_mucus_distribution_sympto.Rdata'))
rm(res, resM, cervical_mucus_distribution_sympto, N, m, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)obs = 'c'
n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(c in c(11,12,13)){
h = hist(cc$cycleday_from_end[cc$feel == c], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, cervix = paste0(obs,'.',c),freq = h$counts/N )
if(c == 11){resC = res}else{resC = rbind(resC, res)}
}
resC$cervix = factor(resC$cervix )
resC = cast(resC, out_cycleday ~ cervix )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resC) = c('cycleday',dict$cervix$names[5:7])
cervix_distribution_sympto = resC
write.csv(cervix_distribution_sympto,
file = paste0(IO$output_csv, 'cervix_distribution_sympto.csv'),
row.names = FALSE)
save(cervix_distribution_sympto, file = paste0(IO$output_Rdata, 'cervix_distribution_sympto.Rdata'))
rm(res, resC, cervix_distribution_sympto, N, c, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)obs = "c"
n.days.in.cycle = 28
i.end = which(CC$cycleday_from_end >= -n.days.in.cycle)
i.start = which(CC$out_cycleday <= n.days.in.cycle)
cc = CC[i.end,]
N = length(unique(cc$cycle_id))
breaks = seq(-n.days.in.cycle-0.5, -0.5, by = 1)
for(c in c(1,2,3)){
h = hist(cc$cycleday_from_end[cc$feel == c], breaks = breaks, plot = FALSE)
res = data.frame(out_cycleday = h$mids, cervix = paste0(obs,'.',c),freq = h$counts/N )
if(c == 1){resC = res}else{resC = rbind(resC, res)}
}
resC$cervix = factor(resC$cervix )
resC = cast(resC, out_cycleday ~ cervix )## Using freq as value column. Use the value argument to cast to override this choice
colnames(resC) = c('cycleday',dict$cervix$names[2:4])
vaginal_sensation_distribution_sympto = resC
write.csv(vaginal_sensation_distribution_sympto,
file = paste0(IO$output_csv, 'vaginal_sensation_distribution_sympto.csv'),
row.names = FALSE)
save(vaginal_sensation_distribution_sympto, file = paste0(IO$output_Rdata, 'vaginal_sensation_distribution_sympto.Rdata'))
rm(res, resC, vaginal_sensation_distribution_sympto, N, c, n.days.in.cycle, i.start, i.end, h, cc, obs, breaks)